clear
version 14.0
set more off
set scheme s2mono
capture log close
log using "..\logs\cross validation.txt", text replace

* Specify the year of cash rent data to use in the regression
local reg_year 2013
* Specify 1 if irrigated or 0 if nonirrigated
local practice 0

use "..\dataAnalysis\rent_climate_data", clear

if `practice'==0{
gen ln_cash_rent=ln(cash_rent_non`reg_year')
}
if `practice'==1{
gen ln_cash_rent=ln(cash_rent_irr`reg_year')
}

*------------------------------------------------------------------------
* Controls to include in the regression
*------------------------------------------------------------------------
local deficit water_deficit
local surplus water_surplus
local soils_controls soc0_150_4 bulkDensity_4 ec_2 ph_less6 ph_greater7dot5 slope_1     
*local soils_controls $SelSoils



*------------------------------------------------------------------------
* Cross-Validation
*------------------------------------------------------------------------
tempname memhold
tempfile results
postfile `memhold' deficit_spline surplus_spline gdd_spline edd_spline temp ///
	rmse1 rmse2 rmse3 rmse4 rmse5 rmse6 rmse7 rmse8 rmse9 rmse10 ///
	rmse11 rmse12 rmse13 rmse14 rmse15 rmse16 rmse17 rmse18 rmse19 using `results', replace

forvalues w=2/6 { 
forvalues s=2/4 { 
forvalues g=2/6 {	
forvalues e=2/6 {
forvalues temp=30/36 {

	if `w'==2{
		capture drop `deficit'_spline*
		gen `deficit'_spline=`deficit'
	}
	if `w'>=3{
		capture drop `deficit'_spline*
		mkspline `deficit'_spline = `deficit', cubic nknots(`w')
	}
	
	if `s'==2{
		capture drop `surplus'_spline*
		gen `surplus'_spline=`surplus'
	}
	if `s'>=3{
		capture drop `surplus'_spline*
		mkspline `surplus'_spline = `surplus', cubic nknots(`s') 
	}
	
	if `g'==2{
		capture drop gdd_spline*
		gen gdd_spline = gdd
	}
	if `g'>=3{
		capture drop gdd_spline*
		mkspline gdd_spline = gdd, cubic nknots(`g') 
	}
	
	if `e'==2{
		capture drop dday`temp'C_spline*
		gen dday`temp'C_spline = dday`temp'C
	}
	if `e'>=3{
		capture drop dday`temp'C_spline*
		mkspline dday`temp'C_spline = dday`temp'C, cubic nknots(`e') 
	}

* Note that I need to use "iw" instead of "aw" for this code 	
qui cluster_crossfold reg ln_cash_rent `deficit'_spline* `surplus'_spline* gdd_spline* dday`temp'C_spline* `soils_controls' i.stfips ///
			 [iw=croplandNon_acres], cluster(stfips_clust) eweight(croplandNon_acres)  
matrix A=r(est)
post `memhold' (`w') (`s') (`g') (`e') (`temp') (A[1,1]) (A[2,1]) (A[3,1]) (A[4,1]) (A[5,1]) ///
		(A[6,1]) (A[7,1]) (A[8,1]) (A[9,1]) (A[10,1]) (A[11,1]) (A[12,1]) (A[13,1]) ///
		(A[14,1]) (A[15,1]) (A[16,1]) (A[17,1]) (A[18,1]) (A[19,1])
}
}
}
}
}

postclose `memhold'

use `results', clear

egen mean_rmse=rowmean(rmse*)
egen sd_rmse=rowsd(rmse*)
egen max_rmse=rowmax(rmse*)


sort mean_rmse
list deficit_spline surplus_spline gdd_spline edd_spline temp mean_rmse sd_rmse max_rmse in 1/15
sort sd_rmse
list deficit_spline surplus_spline gdd_spline edd_spline temp mean_rmse sd_rmse max_rmse in 1/15
sort max_rmse
list deficit_spline surplus_spline gdd_spline edd_spline temp mean_rmse sd_rmse max_rmse in 1/15


log close
